 1.Hive函数之SQL面试题
   
   1).连续7天登录的用户
   -- 数据。uid dt status(1 正常登录，0 异常)
1 2019-07-11 1
1 2019-07-12 1
1 2019-07-13 1
1 2019-07-14 1
1 2019-07-15 1
1 2019-07-16 1
1 2019-07-17 1
1 2019-07-18 1
2 2019-07-11 1
2 2019-07-12 1
2 2019-07-13 0
2 2019-07-14 1
2 2019-07-15 1
2 2019-07-16 0
2 2019-07-17 1
2 2019-07-18 0
3 2019-07-11 1
3 2019-07-12 1
3 2019-07-13 1
3 2019-07-14 0
3 2019-07-15 1
3 2019-07-16 1
3 2019-07-17 1
3 2019-07-18 1
   -- 建表语句
create table ulogin(
uid int,
dt date,
status int
)
row format delimited fields terminated by ' ';
   -- 加载数据
load data local inpath '/home/hadoop/data/ulogin.dat' into
table ulogin;
   -- 连续值的求解，面试中常见的问题。这也是同一类，基本都可按照以下思路进行
   -- 1).使用 row_number 在组内给数据编号(rownum)
   -- 2).某个值 - rownum = gid，得到结果可以作为后面分组计算的依据
   -- 3).根据求得的gid，作为分组条件，求最终结果
   select uid, dt,
date_sub(dt, row_number() over (partition by uid order
by dt)) gid
from ulogin
where status=1;
    
	select uid, count(*) logincount
from (select uid, dt,
date_sub(dt, row_number() over (partition by
uid order by dt)) gid
from ulogin
where status=1) t1
group by uid, gid
having logincount>=7;
 
 2).编写sql语句实现每班前三名，分数一样并列，同时求出前三名按名次排序的分差
   
   -- 数据。sid class score
1 1901 90
2 1901 90
3 1901 83
4 1901 60
5 1902 66
6 1902 23
7 1902 99
8 1902 67
9 1902 87
   
   -- 待求结果数据如下：
class score  rank  lagscore
1901   90      1      0
1901   90      1      0
1901   83      2     -7
1901   60      3     -23
1902   99      1      0
1902   87      2     -12
1902   67      3     -20
   
   -- 建表语句
create table stu(
sno int,
class string,
score int
)row format delimited fields terminated by ' ';
   -- 加载数据
load data local inpath '/home/hadoop/data/stu.dat' into table stu;
   
   -- 求解思路：
   -- 1).上排名函数，分数一样并列，所以用dense_rank
   -- 2).将上一行数据下移，相减即得到分数差
   -- 3).处理 NULL
   select sno,class, score,
dense_rank() over (partition by class order by score desc) as rank
from stu;
   
   select class, score,
dense_rank() over (partition by class order by score desc) as rank,
nvl(score - lag(score) over (partition by class order by score desc),0) 
as lag_score
from stu;
   
 3).行 <=> 列
    
	-- 数据。id1 id2 flag
a b 2
a b 1
a b 3
c d 6
c d 8
c d 8
    -- 编写sql实现如下结果
id1 id2   flag
a    b   2|1|3
c    d    6|8
   -- 创建表 & 加载数据
create table rowline2(
id1 string,
id2 string,
flag int
) row format delimited fields terminated by ' ';
load data local inpath '/home/hadoop/data/data2.dat' into table
rowline2;
  
  -- 第一步 将元素聚拢
select id1, id2, collect_set(flag) flag from rowline2 group by
id1, id2;
select id1, id2, collect_list(flag) flag from rowline2 group
by id1, id2;
select id1, id2, sort_array(collect_set(flag)) flag from
rowline2 group by id1, id2;

   -- 第二步 将元素连接在一起
   select id1, id2, concat_ws("|", collect_set(flag)) flag
from rowline2
group by id1, id2;
  -- 这里报错，CONCAT_WS must be "string or array<string>"。加一个类型转换即可
  select id1, id2, concat_ws("|", collect_set(cast (flag as
string))) flag
from rowline2
group by id1, id2;
  
  -- 创建表 rowline3
create table rowline3 as
select id1, id2, concat_ws("|", collect_set(cast (flag as
string))) flag
from rowline2
group by id1, id2;
  -- 第一步：将复杂的数据展开
select explode(split(flag, "\\|")) flat from rowline3;
  -- 第二步：lateral view 后与其他字段关联
select id1, id2, newflag
from rowline3 lateral view explode(split(flag, "\\|")) t1 as
newflag;
  
  lateralView: LATERAL VIEW udtf(expression) tableAlias AS
columnAlias (',' columnAlias)*
fromClause: FROM baseTable (lateralView)*

  -- 数据：id course
1 java
1 hadoop
1 hive
1 hbase
2 java
2 hive
2 spark
2 flink
3 java
3 hadoop
3 hive
3 kafka
  
  -- 建表加载数据
create table rowline1(
id string,
course string
 )row format delimited fields terminated by ' ';
load data local inpath '/home/hadoop/data/data1.dat' into table
rowline1;
  
  -- 编写sql，得到结果如下(1表示选修，0表示未选修)
 id   java  hadoop hive  hbase  spark  flink  kafka
  1     1      1     1      1      0      0      0
  2     1      0     1      0      1      1      0
  3     1      1     1      0      0      0      1
  
 -- 使用case when；group by + sum
 select id,
sum(case when course="java" then 1 else 0 end) as java,
sum(case when course="hadoop" then 1 else 0 end) as hadoop,
sum(case when course="hive" then 1 else 0 end) as hive,
sum(case when course="hbase" then 1 else 0 end) as hbase,
sum(case when course="spark" then 1 else 0 end) as spark,
sum(case when course="flink" then 1 else 0 end) as flink,
sum(case when course="kafka" then 1 else 0 end) as kafka
from rowline1
group by id;
  
  小结：
  case when + sum + group by
  collect_set、collect_list、concat_ws
  sort_array
  explode + lateral view
  
  